import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from IPython.display import HTML
input_file = 'owid-covid-data.csv'
df = pd.read_csv(input_file)
df['ds'] = pd.to_datetime(df['date'], format="%d/%m/%Y")
df = df.sort_values(by=['ds'])

Explore data

valuesToDrop = ['Asia', 'World', 'International', 'European Union', 'Europe', 'North America', 'Africa', 
                'South America', 'Oceania']
df1 = df[~(df['location'].isin(valuesToDrop))]
df1 = df1.dropna(subset=['new_cases'])
cases_dist = px.sunburst(df1, path=['continent', 'location'], 
                   values = 'new_cases',
                   color='new_cases', 
                   color_continuous_scale = px.colors.sequential.Magenta,
                   title = 'Covid cases distribution'
                  )
# cases_dist.show()

Plotting covid new cases choropleth map

Time Series Analysis

df4 = df.groupby(by=['ds'])['new_cases'].sum().reset_index().sort_values(by='ds', ascending=True)
df4 = df4.rename(columns={'new_cases':'y'})
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly, add_changepoints_to_plot

# instantiate the model, fit the timeseries to the model
model = Prophet().fit(df4)

# predict future cases
future = model.make_future_dataframe(periods=365)
forecast = model.predict(future)

fig = model.plot(forecast)
a = add_changepoints_to_plot(fig.gca(), model, forecast)
# fig;
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Initial log joint probability = -9.54837
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      70       938.957    0.00157276       128.333   1.907e-05       0.001      138  LS failed, Hessian reset 
      99       939.815   0.000128232       50.4087      0.7411      0.7411      177   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     149       940.955    0.00105143       72.7678   9.419e-06       0.001      274  LS failed, Hessian reset 
     199       941.504   1.17708e-05       69.7156      0.3262      0.3262      347   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     249       941.506   5.23168e-08        77.369      0.2579           1      423   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance

hyperparameters tuning

m = Prophet(weekly_seasonality=False, changepoint_prior_scale=0.4, changepoint_range=0.8).fit(df4)

future = m.make_future_dataframe(periods=365)
forecast = m.predict(future)

fig1 = m.plot(forecast, figsize=(20, 12))
ax = fig1.gca()
ax.set_title("Covid cases projection", size=24)
ax.set_xlabel('Date', size=20)
ax.set_ylabel('Cases', size=20)
ax.tick_params(axis="x", labelsize=18)
ax.tick_params(axis="y", labelsize=18)
ax.yaxis.get_major_formatter().set_scientific(False)